In [1]:
from termcolor import colored
from sklearn.tree import DecisionTreeClassifier
import missingno as msno
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import pickle
import pprint
from sklearn.ensemble import RandomForestRegressor
from pandas_profiling import ProfileReport
from dateutil import relativedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from statsmodels.regression.linear_model import OLS

plot_______ = False
plot_______ = True

def new_line():
    print("\n-------------------------\n")

def RMSE(predictions):
    return round(np.sqrt(((test_y - predictions)**2).mean()))

def plot_numerical_columns(col_name):
    if not plot_______:
        return None

    # Histogram
    df[col_name].plot(kind="hist", figsize=(13,8));
    plt.title(col_name, size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # Scatter plot
    df[col_name].plot(figsize=(13,8));
    plt.title(col_name, size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # scatter plot (sort by values), values Vs index
    df[col_name].sort_values().reset_index(drop=True).plot(figsize=(13,8));
    plt.title(col_name+" (SORTED)", size=18);
    plt.axhline(y=df[col_name].mean(), color='red');
    plt.axhline(y=df[col_name].median(), color='green');
    plt.legend(['Actual', 'Mean', 'Median']);
    plt.show()

    # box plot
    df[col_name].plot(kind="box", figsize=(13,8))
    plt.title(col_name, size=18);
    plt.xlabel("");
    plt.show()

def plot_date_columns(col_name):
    if not plot_______:
        return None
    df[col_name].plot(figsize=(15,7), grid=True);
    plt.xlabel("Index", size=14);
    plt.ylabel("Date", size=14);
    plt.title(col_name + " Graph", size=18);
    plt.show();

    df[col_name].sort_values().reset_index(drop=True).plot(figsize=(15,7), grid=True);
    plt.xlabel("Index (sorted)", size=14);
    plt.ylabel("Year", size=14);
    plt.title(col_name + " Graph", size=18);
    plt.show();

    (df[col_name].dt.year.value_counts(sort=False).sort_index() / len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Year", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " year Frequency Graph", size=18);
    plt.show();

    (df[col_name].dt.month.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Month", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " month Frequency Graph", size=18);
    plt.show();

    (df[col_name].dt.day.value_counts().sort_index()/len(df) * 100).plot(kind="bar", figsize=(15,7), grid=True);
    plt.xlabel("Day", size=14);
    plt.ylabel("Ratio (1-100)", size=14);
    plt.title(col_name + " Day Frequency Graph", size=18);
    plt.show();

def plot_catagorical_columns(cat_variable):
    if not plot_______:
        return None
    (df[cat_variable].value_counts() / len(df) * 100).plot.bar(figsize=(15,6), grid=True);
    plt.title(cat_variable, size=18, color='r');
    plt.xlabel("Catagory", size=14, color='r');
    plt.ylabel("Ratio (1-100)", size=14, color='r');
    plt.show()

def data_shape():
    return f"The Data have:\n\t{df.shape[0]} rows\n\t{df.shape[1]} columns\n"
#===
# df = pd.read_csv("data.csv", date_parser=True)

# df = pd.read_csv("df_only_selected_columns_using_PCA.csv", date_parser=True)
# target_variable = "ACTUAL_WORTH"
# df = pd.concat([
#         df.select_dtypes("number").iloc[:, :3],
#         df.select_dtypes("O").iloc[:, :3],
#         df.select_dtypes(exclude=["number", "O"]),
#         df[[target_variable]]], 1)
# target_variable = "AREA_NAME_EN"

# df = pd.read_csv("cleaned_data.csv", date_parser=True)
# target_variable = "SalePrice"

train = pd.read_csv("/home/amir/Downloads/train.csv")
test  = pd.read_csv("/home/amir/Downloads/test.csv")
target_variable = "SalePrice"
train_y = train[target_variable]
train = train.drop(columns=target_variable)
df = pd.concat([train, test])
df[target_variable] = train_y.to_list() + [None]*len(test)
#===
new_line()
print(data_shape())
#===
new_line()
print(f"Columns types distribution:\n\n{df.dtypes.value_counts()}\n")
df.dtypes.value_counts().plot(kind='barh', figsize=(10, 2), grid=True, title="Variable types Count Graph");
plt.xlabel("Count");
plt.show()
#===
f = df[target_variable].isna().sum()
if f:
    new_line()
    to_print = f"There are {f} NAs in target values, we droped those rows"
    print(colored(to_print, 'red'))
    df = df[df[target_variable].notna()]
del f
#---------------------------------------------------
# df.select_dtypes("O").columns[:5]
# D = df.select_dtypes(exclude="O")
# D2 = df.select_dtypes("O").iloc[:,:5]
# df = pd.concat([D, D2], 1)

# profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)
# profile.to_file("your_report.html")
#---------------------------------------- NA
a = df.isna().sum().where(lambda x:x>0).dropna()
if a.size:
    new_line()
    to_print = f"There are {len(a)} (out of {df.shape[1]}, [{round(len(a)/df.shape[1]*100)}%]) columns that contains 1 or more NA."
    print(colored(to_print, 'red'))

    for i in a.index:
        df[i+"_NA_indicator"] = df[i].isna().replace({True : "Missing", False : "Not missing"})
    new_line()
    to_print = f"{a.size} NA_indicator variables added to the data\n"
    print(colored(to_print, 'red'))


    print("========= NA Graphs =========\n")
    msno.matrix(df);
    plt.title("NA Graph");
    plt.show()

    new_line()
    sns.heatmap(df.isnull(), cbar=False);
    plt.title("NA Graph");
    plt.show()
#===
a = a.sort_values()/len(df)*100
if (a == 100).sum():
    new_line()
    df.drop(columns=a[a==100].index, inplace=True)
    to_print = f"There are {(a == 100).sum()} columns that are all Missing values, so we droped those.\nNow {data_shape()}\n\nDropped columns names:"
    print(colored(to_print, 'red'))
    for i in a[a==100].index:
        print("\t",i)
    a = a[a != 100]
#===
x = df[a.index].dtypes.value_counts()
if x.size:
    new_line()
    print(f"NA columns data type Distribution:\n\n{x}")
del x
#===
new_line()
if a.size:
    print(f"NaN Ratio (0-100)\n\n{a}")
else:
    print(colored("Now There is no NaN value in our Data", 'red'))
#===
# ----------------------------------------------- Imputing Missing values
# ------------------------------------ Numerical columns imputing
if df.select_dtypes("number").isna().sum().sum():
    new_line()
    print(f'(Before Missing values treatment)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
    from sklearn.impute import KNNImputer
    df_not_a_number  = df.select_dtypes(exclude="number")
    df_number        = df.select_dtypes("number")
    del df
    imputer = KNNImputer(n_neighbors=4, weights="uniform")
    imputed = imputer.fit_transform(df_number)
    df_number = pd.DataFrame(imputed, columns=df_number.columns)
    df = pd.concat([df_not_a_number.reset_index(drop=True), df_number.reset_index(drop=True)], axis=1)
    del df_not_a_number
    del df_number
    print(f'\n(After filling numeric missing values)\nThere are {df.isna().sum().sum()} Missing values:\n\t{df.select_dtypes("O").isna().sum().sum()} in catagorical variables\n\t{df.select_dtypes("number").isna().sum().sum()} in numerical columns\n\t{df.select_dtypes(exclude=["O", "number"]).isna().sum().sum()} in others')
#===
# -------------------------------- Catagoriacal variables imputating
vars_to_fill = df.select_dtypes("O").isna().mean().where(lambda x:x>0).dropna().sort_values(ascending=True)
if vars_to_fill.size:
    for col in vars_to_fill.index:
        tr = pd.concat([df[[col]], df.loc[:,df.isna().sum() == 0]], 1)
        tr_y = tr[col]
        tr_X = tr.drop(columns=col)

        tr_T = tr_X.select_dtypes("number")
        cat_cols = pd.get_dummies(tr_X.select_dtypes(exclude="number"), prefix_sep="__")
        tr_T[cat_cols.columns.to_list()] = cat_cols

        tr_T[col] = tr_y
        tr = tr_T.copy("deep")

        train = tr[tr[col].notna()]
        test  = tr[tr[col].isna()]

        train_y = train[col]
        train_X = train.drop(columns=col)

        test_X = test.drop(columns=col)

        clf = DecisionTreeClassifier().fit(train_X, train_y)
        test_y = clf.predict(test_X)

        df.loc[df[col].isna(), col] = test_y
    new_line()
    print(f"Missing values imputed, Now there are {df.isna().sum().sum()} Missing values")
# ----------------------------------------------- END Imputing Missing values
# --------------------------------------------------------- Unique values
only_one_unique_value = df.nunique().where(lambda x:x == 1).dropna()
if only_one_unique_value.size:
    new_line()
    df.drop(columns=only_one_unique_value.index, inplace=True)
    last_ = ("", "it") if  only_one_unique_value.size == 1 else ("s", "those")
    to_print = f"There are {only_one_unique_value.size} variable{last_[0]} That have only one unique value, so we droped {last_[1]}.\nDropped column{last_[0]} name{last_[0]} (in order):"
    print(colored(to_print, 'red'))
    for i in only_one_unique_value.index.sort_values():
        print(i)
    new_line()
    print(f"\nNow {data_shape()}")
del only_one_unique_value
# #===
all_values_are_unique = df.apply(lambda x:x.is_unique).where(lambda x:x==True).dropna()
if all_values_are_unique.size:
    new_line()
    df.drop(columns=all_values_are_unique.index, inplace=True)
    last_ = ("", "it") if  all_values_are_unique.size == 1 else ("s", "those")
    to_print = f"There are {all_values_are_unique.size} column{last_[0]} that have all unique values, so no value repeatation, we droped {last_[1]} column{last_[0]}.\nDropped column{last_[0]} name{last_[0]} are:\n"
    print(colored(to_print, 'red'))
    for i in all_values_are_unique.index:
        print("\t", i)
    new_line()
    print(f"Now {data_shape()}")
del all_values_are_unique
#===
date_columns = []
def DTYPES():
    global date_columns
    catagorical_columns = df.head().select_dtypes("O").columns
    numerical_columns   = df.head().select_dtypes("number").columns
    date_columns        = []

    for i in catagorical_columns:
        try:
            df[i] = pd.to_datetime(df[i])
            date_columns.append(i)
        except:
            pass

    catagorical_columns = catagorical_columns.drop(date_columns)
    if date_columns:
        date_columns = pd.Index(date_columns)
    #===
    if not catagorical_columns.append(numerical_columns).append(date_columns).is_unique:
        new_line()
        print(colored("Some column/s repated in > 1 dtypes\n", 'red'))
        dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
                    "dtype" : ['O']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
        print(dtypes[dtypes.Column.isin(list(dtypes[dtypes.Column.duplicated()].Column.values))].to_string())
    #===
    x = df.columns.difference(
        catagorical_columns.append(numerical_columns).append(date_columns)
        )
    if x.size:
        new_line()
        print(colored("Some columns not included in any existing catagory, those:\n", 'red'))
        for i in x:
            print(f"\t<{i}, with dtype of <{df[i].dtype}>")
    #===
    dtypes = pd.DataFrame({"Column" : catagorical_columns.append(numerical_columns).append(date_columns),
                "dtype" : ['Object']*len(catagorical_columns) + ['Number']*len(numerical_columns) + ['Date']*len(date_columns)})
    return dtypes
#===
dtypes = DTYPES()
# ----------------------------------------------------------------------- Feature enginearing
# ======= Adding date columns
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> add polynomial, sqrt, tree, log features
def add_new_date_cols(x, suffix):
    d = {}
    d[suffix + '_week_normalized'] = x.dt.week / 52
    d[suffix + '_week_str'] = '"' + x.dt.week.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_year_after_min_year'] = x.dt.year - x.dt.year.min()
    d[suffix + '_year_str'] = '"' + x.dt.year.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_day_name']  = x.dt.day_name()

    d[suffix + '_day_after_min_date_str']  = '"' + (x - x.min()).apply(lambda x: str(x).split()[0]) + '"'

    d[suffix + '_day_normalized'] = x.dt.day / 31

    d[suffix + '_hour_normalized'] = x.dt.hour / 24
    d[suffix + '_hour_str'] = '"' + x.dt.hour.apply(lambda x:np.nan if np.isnan(x) else str(x).replace(".0", "")) + '"'

    d[suffix + '_month_name'] = x.dt.month_name()
    d[suffix + '_month_normalized'] = x.dt.month/12
    for k,v in d.items():
        if v.nunique() > 1:
            df[k] = v
    return df.drop(columns=x.name)
    # return df

len_df_before_adding_date_vars = df.shape[1]
for date_col in date_columns:
    df = add_new_date_cols(df[date_col], date_col)
len_df_after_adding_date_vars  = df.shape[1]
if len_df_after_adding_date_vars > len_df_before_adding_date_vars:
    new_line()
    to_print = f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} date Features"
    print(colored(to_print, 'red'))

# ======= type casting of numerical variable (those who have < 4% unique values) to catagorical variables
f = (df.select_dtypes("number").nunique() / len(df) * 100).where(lambda x:x<4).dropna().index
if f.size:
    len_df_before_adding_date_vars = df.shape[1]
    for col_num_to_str in f:
        df[col_num_to_str+"_str"] = '"' + df[col_num_to_str].astype(str) + '"'
    len_df_after_adding_date_vars  = df.shape[1]
    new_line()
    to_print = f"Added {len_df_after_adding_date_vars - len_df_before_adding_date_vars} String Features (Extracted from numerical variables)"
    print(colored(to_print, 'red'))
# =======
def cluping_rare_cases_in_one_catagory(x):
    global df
    x = df[x]
    orignal  = x.copy("deep")
    xx = x.value_counts()
    xx = xx[xx< 10].index.to_list()
    x =  x.replace(xx , "Rare cases")
    if x.value_counts()[-1] < 8:
        x[x == "Rare cases"] = x.mode()[0] # agar "Rare cases" vali catogery me 8 sy bhi kam values hon to un ko most common value sy replace kar do
    if x.nunique() == 1:
        new_line()
        to_print = f"The column <{x.name}> have only one unique value, We droped it from the data."
        print(colored(to_print, 'red'))
        # return orignal
        df.drop(columns=x.name, inplace=True)
        return None
    return x

for var in df.select_dtypes("O").columns:
    m = cluping_rare_cases_in_one_catagory(var)
    if isinstance(m, pd.core.series.Series):
        df[var] = m
new_line()


xx = (df == 'Rare cases').sum().sort_values().where(lambda x:x>0).dropna()
xx = pd.DataFrame({"Count" : xx,
                "Ratio" : round(xx/len(df)*100, 4)})
print(f"<Rare case> catagory:\n{xx.to_string()}")
# ----------------------------------------------------------------------- END (Feature enginearing)
dtypes = DTYPES()
# ---------------------------------------------------- Correlation plot
new_line()
cor_df = df.select_dtypes('number').corr().abs()
mask = np.triu(np.ones_like(cor_df, dtype=bool));
f, ax = plt.subplots(figsize=(17, 10));
cmap = sns.color_palette("viridis", as_cmap=True);
plot_ = sns.heatmap(cor_df, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5});
plot_.axes.set_title("abs (Correlation) plot",fontsize=25);
plt.show()
# ---------------------------------------------------------------------
#===
# m = 0
for row in dtypes.iterrows():
    # m += 1
    # if m == 3:
        # break
    column_name, type_ = row[1]
    x = df[column_name]
    to_print = "f\n\n\n========================================= {column_name} =========================================\n\n"
    print(colored(to_print, 'red'))

    for col_ in df.columns:
        if col_ == column_name:
            continue
        if df[col_].nunique() == df[column_name].nunique():
            unique_combination = df[[col_, column_name]].drop_duplicates()
            if unique_combination.apply(lambda x:x.is_unique).sum() == 2:
                new_line()
                to_print = f"This Columns is duplicate of <{col_}> column"
                print(colored(to_print, 'red'))

    # print(f"Column Type     : {type_}")
    print(f"Column Type     : ", end="")
    print(colored(type_, 'red'))
    if x.isna().all():
        new_line()
        df.drop(columns=column_name, inplace=True)
        print(colored("We dropped This column, because it is all Empty", 'red'))
        continue
    if type_ in ["O", "Date"]:
        if x.is_unique:
            new_line()
            df.drop(columns=column_name, inplace=True)
            to_print = f"We dropped This column, because it's a {type_} columns, and it's all values are unique"
            print(colored(to_print, 'red'))
            continue
    if x.nunique() == 1:
        new_line()
        df.drop(columns=column_name, inplace=True)
        print(colored("We dropped This column, because There is only one unique value", 'red'))
        continue

    if type_ == "Number":
        local_cor = cor_df[column_name].drop(column_name).reset_index()
        local_cor = local_cor.reindex(local_cor[column_name].abs().sort_values().index)
        if local_cor[column_name].max() == 1:
            new_line()
            to_print = f"This column is perfactly correlated with column <{local_cor[local_cor[column_name] == 1]['index'].values[0]}, so remove one of them"
            print(colored(to_print, 'red'))

        new_line()
        xm = local_cor[-3:].rename(columns={'index' : 'Column name', column_name : 'Correlation'}).reset_index(drop=True)
        xm.index = xm['Column name']
        xm.drop(columns="Column name", inplace=True);
        xm.plot(kind='barh', grid=True, figsize=(10,1.5));
        plt.title("Most 3 correlated features with this columns (sorted)", size=14);
        plt.xlabel("Correlation", size=12);
        plt.show();

        new_line()
        skewness = x.skew(skipna = True)
        if abs(skewness) < 0.5:
            print(f"The data is fairly symmetrical (skewness is: {skewness})")
        elif abs(skewness) < 1:
            print(f"The data are moderately skewed (skewness is: {skewness})")
        else:
            to_print = f"The data are highly skewed (skewness is: {skewness})\nNote: When skewness exceed |1| we called it highly skewed"
            print(colored(to_print, 'red'))

        # f = x.describe()
        # f['Nunique'] = x.nunique()
        # f['Nunique ratio'] = f.loc["Nunique"] / f.loc["count"] * 100
        # f['Outlies count'] = (((x - x.mean())/x.std()).abs() > 3).sum()
        # f['Outlies ratio'] = f.loc["Outlies count"] / f.loc["count"] * 100
        # f['Nagative values count'] = (x < 0).sum()
        # f['Nagative values ratio'] = f['Nagative values count'] / f['count'] * 100

        ff = [x.count(), x.isna().sum(), x.mean(), x.std(), x.min()]
        ff += x.quantile([.25,.5,.75]).to_list()
        ff += [x.max(), x.nunique(), (((x - x.mean())/x.std()).abs() > 3).sum(), (x < 0).sum(), (x == 0).sum()]

        f = pd.DataFrame(ff, index=['Count', 'NA', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max', 'Nunique', 'Outlies', 'Nagetive', 'Zeros'], columns=['Count'])
        f['Ratio'] = f.Count / x.count() * 100
        f.loc['Mean' : 'Max', 'Ratio'] = None

        new_line()
        print(f.round(2).to_string())
        plot_numerical_columns(column_name)

    elif type_ == "Object":
        # f = x.describe()
        # f = x.agg(['count', pd.Series.nunique])
        # f['len'] = len(x)
        # f['Na count'] = x.isna().sum()
        # f['Na ratio'] = f['Na count'] / f['count'] * 100
        # f['Most frequent'] = x.mode().values[0]
        # f['Most frequent count'] = (x == f['Most frequent']).sum()
        # f['Most frequent ratio'] = f['Most frequent count'] / f['count'] * 100
        # f['Least frequent'] = x.value_counts().tail(1).index[0]
        # f['Least frequent count'] = (x == f['Least frequent']).sum()
        # f['Least frequent ratio'] = f['Least frequent count'] / f['count'] * 100
        # f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
        # f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100

        l = x.count(), x.nunique(), len(x), x.isna().sum(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
        f = pd.DataFrame(l, index=['Count', 'Nunique', 'Len', 'NA', 'Most frequent', 'Least frequent', 'Values occured only once'], columns=['Counts'])
        f['Ratio'] = (f.Counts / x.count() * 100).round(4)
        f.loc[['Len'], 'Ratio'] = None

        new_line()
        print(f.to_string())


        if x.str.lower().nunique() != x.nunique():
            new_line()
            to_print = f"Case issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin lower verstion there are   {x.str.lower().nunique()} unique values.\n"
            print(colored(to_print, 'red'))

        if x.str.strip().nunique() != x.nunique():
            new_line()
            to_print = f"Space issue\n\tin orignal variable There are {x.nunique()} unique values\n\tin striped verstion there are {x.str.strip().nunique()} unique values."
            print(colored(to_print, 'red'))

        plot_catagorical_columns(column_name)

    elif type == "Date":

        new_line()
        rd = relativedelta.relativedelta( pd.to_datetime(x.max()), pd.to_datetime(x.min()))
        to_print = f"Diffrenece between first and last date:\n\tYears : {rd.years}\n\tMonths: {rd.months}\n\tDays  : {rd.days}"
        print(colored(to_print, 'red'))

        # f = pd.Series({'Count' : x.count(),
        #             'Nunique count' : x.nunique(),
        #             'Nunique ratio' : x.nunique() / x.count() * 100,
        #             'Most frequent value' : str(x.mode()[0]),
        #             'Least frequent value' :  x.value_counts().tail(1).index[0]
        #             })
        # f['Most frequent count'] = (x == f['Most frequent value']).sum()
        # f['Most frequent ratio'] = f['Most frequent count'] / f['Count'] * 100
        # f['Least frequent count'] = (x == f['Least frequent value']).sum()
        # f['Least frequent ratio'] = f['Least frequent count'] / f['Count'] * 100
        # f['Values occured only once count'] = x.value_counts().where(lambda x:x==1).dropna().size
        # f['Values occured only once Ratio'] = f['Values occured only once count'] / x.count() * 100

        ff = x.count(), x.nunique(), (x == x.mode().values[0]).sum(), (x == x.value_counts().tail(1).index[0]).sum(), x.value_counts().where(lambda x:x==1).dropna().size
        f = pd.DataFrame(ff, index=["Count", 'Nunique', 'Most frequent values', 'Least frequent values', 'Values occured only once count'], columns=['Counts'])
        f['Ratio'] = (f.Counts / x.count() * 100).round(4)

        new_line()
        print(f"\n{f.to_string()}")


        f = set(np.arange(x.dt.year.min(),x.dt.year.max()+1)).difference(
            x.dt.year.unique())
        if f:
            new_line()
            print(colored("These Years (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        f = set(np.arange(x.dt.month.min(),x.dt.month.max()+1)).difference(
            x.dt.month.unique())
        if f:
            new_line()
            print(colored("These Months (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        f = set(np.arange(x.dt.day.min(),x.dt.day.max()+1)).difference(
            x.dt.day.unique())
        if f:
            new_line()
            print(colored("These Days (in order) are missing:\n", 'red'))
            for i in f:
                print("\t", i, end=", ")

        new_line()
        plot_date_columns(column_name)


# ================================================================================================================ Modeling
print("\n\n")
print("----------------------------------------------------------------------------------------------")
print("****************************************** Modeling ******************************************")

# Regression problem
if df[target_variable].dtype in [float, int]:

    print("\n-------------------- This is Regression problem --------------------\n")
    print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")

    df_T = df.select_dtypes("number")
    cat_cols = pd.get_dummies(df.select_dtypes(exclude="number"), prefix_sep="__")
    df_T[cat_cols.columns.to_list()] = cat_cols

    df = df_T.copy("deep")
    del df_T
    del cat_cols
    # ====
    train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
    # ====
    # --------------------------------------------------------- Linear regression
    to_print = "\n ------------------------------------- Linear Regression -------------------------------------\n"
    print(colored(to_print, 'red'))

    model_reg = OLS(train_y, train_X).fit()
    summary = model_reg.summary()
    summary_df = pd.DataFrame(summary.tables[1])
    summary_df.columns = summary_df.iloc[0]
    summary_df.drop(0, inplace=True)
    summary_df.columns = summary_df.columns.astype(str)
    summary_df.columns = ["Variable"] + summary_df.columns[1:].to_list()
    for i in summary_df.columns[1:]:
        summary_df[i] = summary_df[i].astype(str).astype(float)
    summary_df.Variable = summary_df.Variable.astype(str)
    summary_df['Indicator'] = summary_df['P>|t|'].apply(lambda x:"***" if x < 0.001 else "**" if x < 0.01 else "*" if x < 0.05 else "." if x < 0.1  else "")
    summary_df = summary_df.sort_values("Variable").reset_index(drop=True)
    summary_df.to_csv()
    new_line()
    print(colored("NOTE: This summary saved as <summary_OLS_1.csv>", 'red'))

    new_line()
    print(summary_df.to_string())
    # ============================= Model statistic
    predictions = model_reg.predict(test_X)

    new_line()
    print(colored(" --- Model statistic --- \n", 'red'))
    print(f"R-squared         : {round(model_reg.rsquared, 3)}")
    print(f"Adj. R-squared    : {round(model_reg.rsquared_adj, 3)}")
    print(f"F-statistic       : {round(model_reg.fvalue)}")
    print(f"Prob (F-statistic): {model_reg.f_pvalue}")
    print(f"No. Observations  : {round(model_reg.nobs)}")
    print(f"AIC               : {round(model_reg.aic)}")
    print(f"Df Residuals      : {round(model_reg.df_resid)}")
    print(f"BIC               : {round(model_reg.bic)}")
    print(f"RMSE (test)       : {RMSE(predictions)}")
    # ======
    f = train_X.copy("deep")
    f['Errors__'] = model_reg.resid
    f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
    new_line()
    print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
    print(f"Mean of train reseduals: {model_reg.resid.mean()}")
    del f
    # ============================= END (Model statistic)
    # --------------------------------------------------------- END Linear regression




    # --------------------------------------------------------- Random Forest
    print("\n ------------------------------------- Random Forest -------------------------------------\n")

    rf = RandomForestRegressor(n_estimators = 200, oob_score=True)
    model_rf = rf.fit(train_X, train_y);
    predictions_rf = rf.predict(test_X)

    new_line()
    print(colored("RF model peramters:\n", 'red'))
    pprint.pprint(model_rf.get_params())

    new_line()
    importances = list(rf.feature_importances_)
    feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(test_X, importances)]
    featuresImportance = pd.Series(model_rf.feature_importances_, index=train_X.columns).sort_values(ascending=False)
    if len(featuresImportance) > 30:
        featuresImportance = featuresImportance.head(30)
    featuresImportance.plot(figsize=(20,10), kind='bar', grid=True);
    plt.title("RandomForest Feature importances Graph", size=18,color='red');
    plt.xlabel("Features", size=14, color='red');
    plt.ylabel("Importance", size=14, color='red');
    plt.show();
    del featuresImportance

    new_line()
    print(colored("--- Model statistic ---", 'red'))
    # The coefficient of determination R^2 of the prediction.
    # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
    print(f"R^2 (test) : {rf.score(test_X, test_y)}")
    print(f"R^2 (train): {rf.score(train_X, train_y)}")
    print(f"RMSE (test): {RMSE(predictions_rf)}")
    print(f"oob score  : {model_rf.oob_score_}")

    f = test_X.copy("deep")
    errors_rf = predictions_rf - test_y
    f['Errors__'] = errors_rf
    f = f.corr()['Errors__'].drop("Errors__").abs().sort_values().dropna().tail(1)
    new_line()
    print(f"Maximum correlation between Reseduals and any data columns is {f.values[0]}, with columns <{f.index[0]}>")
    # --------------------------------------------------------- END Random Forest
elif df[target_variable].dtype == "O":
    # Classififcation problem
    if df[target_variable].nunique() == 2:
        print("\n-------------------- This is Binary Classification problem --------------------\n")
        print("''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")
        df = pd.concat([
                        df.select_dtypes(exclude = "O"),
                        pd.get_dummies(df.drop(columns=target_variable).select_dtypes("O")),
                        df[[target_variable]]
                        ], 1)

        train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])
        clf = LogisticRegression().fit(train_X, train_y)
        predictions = clf.predict_proba(test_X)
        predictions = pd.Series(predictions[:, 0])
        lst = []
        for thresh in np.linspace(predictions.min(), predictions.max(), 50)[1:]:
            pred = predictions < thresh

            pred.loc[pred == True] = clf.classes_[0]
            pred.loc[pred == False] = clf.classes_[1]

            test_y = test_y.reset_index(drop=True)

            TN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[0])).sum()
            TP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[1])).sum()
            FN = ((pred == clf.classes_[0]) & (test_y == clf.classes_[1])).sum()
            FP = ((pred == clf.classes_[1]) & (test_y == clf.classes_[0])).sum()

            p = TP / (TP + FP)
            r = TP / (TP + FN)
            f = 2 * ((p * r) / (p+r))

            lst.append((thresh, (pred == test_y).mean(), p, r , f))

        d = pd.DataFrame(lst, columns=["Thresold", "Accurecy(0-1)", "Precision", "Recall", "F1"])
        d = d.set_index("Thresold")
        d.plot(grid=True, figsize=(18,7));
        plt.title("Model performance at diffrent Thresolds", size=18, color='red');
        plt.xlabel("Thresold", size=14, color='red');
        plt.ylabel("");
        plt.show()
    else:
        to_print = "\n-------------------- This is Multiclass Classification problem --------------------\n"
        print(colored(to_print, 'red'))
        print("'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''")

        df.loc[:, df.select_dtypes("O").columns] = df.select_dtypes("O").apply(lambda x: pd.Series(LabelEncoder().fit_transform(x.astype(str))).astype(str))
        train_X, test_X, train_y, test_y = train_test_split(df.drop(columns=target_variable), df[target_variable])

        clf=RandomForestClassifier(n_estimators=1000).fit(train_X, train_y)
        predictions = clf.predict(test_X)
        feature_imp = pd.Series(clf.feature_importances_,index=train_X.columns).sort_values(ascending=False)
        if feature_imp.size > 30:
            feature_imp = feature_imp.head(30)
        feature_imp.plot(kind='barh', figsize=(17,10), grid=True);
        plt.title("Feature importances Graph", size=18, color='red');
        plt.xlabel("Importance", size=14, color='red');
        plt.ylabel("Feature", size=14, color='red');
        plt.show()
        # ====
        f = (test_y, predictions)
        f_int = (test_y.astype(int), predictions.astype(int))

        print(f"accuracy_score: {metrics.accuracy_score(*f)}")
        print(f"f1_score: {metrics.f1_score(*f_int)}")

        metrics.plot_roc_curve(clf, test_X, test_y);
        plt.title("ROC curve plot");
        plt.show();

        metrics.ConfusionMatrixDisplay(metrics.confusion_matrix(*f)); plt.show()

        metrics.plot_confusion_matrix(clf, test_X, test_y);
        plt.title("Confusion matrix");
        plt.show()

        metrics.plot_precision_recall_curve(clf, test_X, test_y);
        plt.title("Precision recall curve");
        plt.show()
# ================================================================================================================ END Modeling
-------------------------

The Data have:
	2919 rows
	81 columns


-------------------------

Columns types distribution:

object     43
int64      26
float64    12
dtype: int64

-------------------------

There are 1459 NAs in target values, we droped those rows

-------------------------

There are 19 (out of 81, [23%]) columns that contains 1 or more NA.

-------------------------

19 NA_indicator variables added to the data

========= NA Graphs =========

-------------------------

-------------------------

NA columns data type Distribution:

object     16
float64     3
dtype: int64

-------------------------

NaN Ratio (0-100)

Electrical       0.068493
MasVnrType       0.547945
MasVnrArea       0.547945
BsmtQual         2.534247
BsmtCond         2.534247
BsmtFinType1     2.534247
BsmtExposure     2.602740
BsmtFinType2     2.602740
GarageCond       5.547945
GarageQual       5.547945
GarageFinish     5.547945
GarageType       5.547945
GarageYrBlt      5.547945
LotFrontage     17.739726
FireplaceQu     47.260274
Fence           80.753425
Alley           93.767123
MiscFeature     96.301370
PoolQC          99.520548
dtype: float64

-------------------------

(Before Missing values treatment)
There are 6965 Missing values:
	6617 in catagorical variables
	348 in numerical columns
	0.0 in others

(After filling numeric missing values)
There are 6617 Missing values:
	6617 in catagorical variables
	0 in numerical columns
	0.0 in others

-------------------------

Missing values imputed, Now there are 0 Missing values

-------------------------

There are 1 column that have all unique values, so no value repeatation, we droped it column.
Dropped column name are:

	 Id

-------------------------

Now The Data have:
	1460 rows
	99 columns


-------------------------

Added 18 String Features (Extracted from numerical variables)

-------------------------

The column <Street> have only one unique value, We droped it from the data.

-------------------------

The column <Utilities> have only one unique value, We droped it from the data.

-------------------------

The column <Electrical_NA_indicator> have only one unique value, We droped it from the data.

-------------------------

The column <PoolQC_NA_indicator> have only one unique value, We droped it from the data.

-------------------------

The column <PoolArea_str> have only one unique value, We droped it from the data.

-------------------------

<Rare case> catagory:
                         Count   Ratio
HouseStyle                 8.0  0.5479
MasVnrType_NA_indicator    8.0  0.5479
MasVnrArea_NA_indicator    8.0  0.5479
FullBath_str               9.0  0.6164
Foundation                 9.0  0.6164
RoofStyle                  9.0  0.6164
MiscFeature                9.0  0.6164
Neighborhood              11.0  0.7534
Heating                   14.0  0.9589
BedroomAbvGr_str          14.0  0.9589
Condition1                15.0  1.0274
Condition2                15.0  1.0274
RoofMatl                  15.0  1.0274
Exterior2nd               17.0  1.1644
3SsnPorch_str             24.0  1.6438
LowQualFinSF_str          26.0  1.7808
SaleType                  28.0  1.9178
MiscVal_str               41.0  2.8082

-------------------------

f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1151   78.8356
Least frequent                10    0.6849
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent                829   56.7808
Least frequent               631   43.2192
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                925   63.3562
Least frequent                10    0.6849
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1311   89.7945
Least frequent                36    2.4658
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1056   72.3288
Least frequent                47    3.2192
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1382   94.6575
Least frequent                13    0.8904
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       24    1.6438
Len                         1460       NaN
NA                             0    0.0000
Most frequent                225   15.4110
Least frequent                11    0.7534
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1260   86.3014
Least frequent                11    0.7534
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1445   98.9726
Least frequent                15    1.0274
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1220   83.5616
Least frequent                31    2.1233
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        8    0.5479
Len                         1460       NaN
NA                             0    0.0000
Most frequent                726   49.7260
Least frequent                 8    0.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1141   78.1507
Least frequent                 9    0.6164
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1434   98.2192
Least frequent                11    0.7534
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       10    0.6849
Len                         1460       NaN
NA                             0    0.0000
Most frequent                522   35.7534
Least frequent                20    1.3699
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       12    0.8219
Len                         1460       NaN
NA                             0    0.0000
Most frequent                504   34.5205
Least frequent                10    0.6849
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                864   59.1781
Least frequent                15    1.0274
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                906   62.0548
Least frequent                14    0.9589
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1286   88.0822
Least frequent                28    1.9178
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                647   44.3151
Least frequent                 9    0.6164
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                663   45.4110
Least frequent                52    3.5616
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1335   91.4384
Least frequent                60    4.1096
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                955   65.4110
Least frequent               116    7.9452
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                467   31.9863
Least frequent                74    5.0685
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1293   88.5616
Least frequent                14    0.9589
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1428   97.8082
Least frequent                14    0.9589
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                742   50.8219
Least frequent                49    3.3562
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1365   93.4932
Least frequent                95    6.5068
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1339   91.7123
Least frequent                27    1.8493
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                735   50.3425
Least frequent                39    2.6712
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1366   93.5616
Least frequent                14    0.9589
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                660   45.2055
Least frequent                39    2.6712
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                909   62.2603
Least frequent                11    0.7534
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                676   46.3014
Least frequent               354   24.2466
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1331   91.1644
Least frequent                15    1.0274
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1376   94.2466
Least frequent                10    0.6849
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1340   91.7808
Least frequent                30    2.0548
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts    Ratio
Count                       1460  100.000
Nunique                        2    0.137
Len                         1460      NaN
NA                             0    0.000
Most frequent                870   59.589
Least frequent               590   40.411
Values occured only once       0    0.000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                594   40.6849
Least frequent                38    2.6027
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                890   60.9589
Least frequent                 9    0.6164
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1267   86.7808
Least frequent                28    1.9178
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1202   82.3288
Least frequent                12    0.8219
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1201   82.2603
Least frequent               259   17.7397
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1369   93.7671
Least frequent                91    6.2329
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <MasVnrArea_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1452   99.4521
Least frequent                 8    0.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <MasVnrType_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1452   99.4521
Least frequent                 8    0.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <BsmtCond_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtFinType1_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <BsmtQual_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtFinType1_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1422   97.3973
Least frequent                38    2.6027
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <BsmtQual_NA_indicator> column

-------------------------

This Columns is duplicate of <BsmtCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1423   97.4658
Least frequent                37    2.5342
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1422   97.3973
Least frequent                38    2.6027
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent                770   52.7397
Least frequent               690   47.2603
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageCond_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <GarageType_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageYrBlt_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageFinish_NA_indicator> column

-------------------------

This Columns is duplicate of <GarageQual_NA_indicator> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1379   94.4521
Least frequent                81    5.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1179   80.7534
Least frequent               281   19.2466
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1406   96.3014
Least frequent                54    3.6986
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       14    0.9589
Len                         1460       NaN
NA                             0    0.0000
Most frequent                540   36.9863
Least frequent                10    0.6849
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        8    0.5479
Len                         1460       NaN
NA                             0    0.0000
Most frequent                402   27.5342
Least frequent                18    1.2329
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        7    0.4795
Len                         1460       NaN
NA                             0    0.0000
Most frequent                827   56.6438
Least frequent                22    1.5068
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1434   98.2192
Least frequent                26    1.7808
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                857   58.6986
Least frequent                15    1.0274
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1380   94.5205
Least frequent                80    5.4795
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <FullBath> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                768   52.6027
Least frequent                 9    0.6164
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <HalfBath> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                913   62.5342
Least frequent                12    0.8219
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        6    0.4110
Len                         1460       NaN
NA                             0    0.0000
Most frequent                804   55.0685
Least frequent                14    0.9589
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1395   95.5479
Least frequent                65    4.4521
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       10    0.6849
Len                         1460       NaN
NA                             0    0.0000
Most frequent                404   27.6712
Least frequent                11    0.7534
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent                695   47.6027
Least frequent               115    7.8767
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        4    0.2740
Len                         1460       NaN
NA                             0    0.0000
Most frequent                829   56.7808
Least frequent                81    5.5479
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        2    0.1370
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1436   98.3562
Least frequent                24    1.6438
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        3    0.2055
Len                         1460       NaN
NA                             0    0.0000
Most frequent               1408   96.4384
Least frequent                11    0.7534
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <MoSold> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                       12    0.8219
Len                         1460       NaN
NA                             0    0.0000
Most frequent                253   17.3288
Least frequent                52    3.5616
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <YrSold> column
Column Type     : Object

-------------------------

                          Counts     Ratio
Count                       1460  100.0000
Nunique                        5    0.3425
Len                         1460       NaN
NA                             0    0.0000
Most frequent                338   23.1507
Least frequent               175   11.9863
Values occured only once       0    0.0000
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.4076567471495591)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

           Count   Ratio
Count     1460.0  100.00
NA           0.0    0.00
Mean        56.9     NaN
Std         42.3     NaN
Min         20.0     NaN
25%         20.0     NaN
50%         50.0     NaN
75%         70.0     NaN
Max        190.0     NaN
Nunique     15.0    1.03
Outlies     30.0    2.05
Nagetive     0.0    0.00
Zeros        0.0    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.0120008521763144)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        70.75     NaN
Std         23.47     NaN
Min         21.00     NaN
25%         60.00     NaN
50%         70.00     NaN
75%         80.00     NaN
Max        313.00     NaN
Nunique    224.00   15.34
Outlies     14.00    0.96
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 12.207687851233496)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

              Count   Ratio
Count       1460.00  100.00
NA             0.00    0.00
Mean       10516.83     NaN
Std         9981.26     NaN
Min         1300.00     NaN
25%         7553.50     NaN
50%         9478.50     NaN
75%        11601.50     NaN
Max       215245.00     NaN
Nunique     1073.00   73.49
Outlies       13.00    0.89
Nagetive       0.00    0.00
Zeros          0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.2169439277628693)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.10     NaN
Std          1.38     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          7.00     NaN
Max         10.00     NaN
Nunique     10.00    0.68
Outlies      2.00    0.14
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6930674724842182)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         5.58     NaN
Std          1.11     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          5.00     NaN
75%          6.00     NaN
Max          9.00     NaN
Nunique      9.00    0.62
Outlies     28.00    1.92
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.613461172488183)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1971.27     NaN
Std         30.20     NaN
Min       1872.00     NaN
25%       1954.00     NaN
50%       1973.00     NaN
75%       2000.00     NaN
Max       2010.00     NaN
Nunique    112.00    7.67
Outlies      6.00    0.41
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.5035620027004709)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1984.87     NaN
Std         20.65     NaN
Min       1950.00     NaN
25%       1967.00     NaN
50%       1994.00     NaN
75%       2004.00     NaN
Max       2010.00     NaN
Nunique     61.00    4.18
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.6682455485578593)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       103.84     NaN
Std        180.74     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        166.00     NaN
Max       1600.00     NaN
Nunique    335.00   22.95
Outlies     32.00    2.19
Nagetive     0.00    0.00
Zeros      861.00   58.97
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.685503071910789)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       443.64     NaN
Std        456.10     NaN
Min          0.00     NaN
25%          0.00     NaN
50%        383.50     NaN
75%        712.25     NaN
Max       5644.00     NaN
Nunique    637.00   43.63
Outlies      6.00    0.41
Nagetive     0.00    0.00
Zeros      467.00   31.99
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.255261108933303)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        46.55     NaN
Std        161.32     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max       1474.00     NaN
Nunique    144.00    9.86
Outlies     50.00    3.42
Nagetive     0.00    0.00
Zeros     1293.00   88.56
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.9202684528039037)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       567.24     NaN
Std        441.87     NaN
Min          0.00     NaN
25%        223.00     NaN
50%        477.50     NaN
75%        808.00     NaN
Max       2336.00     NaN
Nunique    780.00   53.42
Outlies     11.00    0.75
Nagetive     0.00    0.00
Zeros      118.00    8.08
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.5242545490627664)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1057.43     NaN
Std        438.71     NaN
Min          0.00     NaN
25%        795.75     NaN
50%        991.50     NaN
75%       1298.25     NaN
Max       6110.00     NaN
Nunique    721.00   49.38
Outlies     10.00    0.68
Nagetive     0.00    0.00
Zeros       37.00    2.53
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.3767566220336365)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1162.63     NaN
Std        386.59     NaN
Min        334.00     NaN
25%        882.00     NaN
50%       1087.00     NaN
75%       1391.25     NaN
Max       4692.00     NaN
Nunique    753.00   51.58
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.8130298163023265)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       346.99     NaN
Std        436.53     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        728.00     NaN
Max       2065.00     NaN
Nunique    417.00   28.56
Outlies      4.00    0.27
Nagetive     0.00    0.00
Zeros      829.00   56.78
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 9.011341288465387)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         5.84     NaN
Std         48.62     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        572.00     NaN
Nunique     24.00    1.64
Outlies     20.00    1.37
Nagetive     0.00    0.00
Zeros     1434.00   98.22
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.3665603560164552)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1515.46     NaN
Std        525.48     NaN
Min        334.00     NaN
25%       1129.50     NaN
50%       1464.00     NaN
75%       1776.75     NaN
Max       5642.00     NaN
Nunique    861.00   58.97
Outlies     16.00    1.10
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.596066609663168)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.43     NaN
Std          0.52     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies     16.00    1.10
Nagetive     0.00    0.00
Zeros      856.00   58.63
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.103402697955168)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.06     NaN
Std          0.24     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max          2.00     NaN
Nunique      3.00    0.21
Outlies     82.00    5.62
Nagetive     0.00    0.00
Zeros     1378.00   94.38
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <FullBath_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.036561558402727165)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.57     NaN
Std          0.55     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          2.00     NaN
75%          2.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        9.00    0.62
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <HalfBath_str> column
Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.675897448233722)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.38     NaN
Std          0.50     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          1.00     NaN
Max          2.00     NaN
Nunique      3.00    0.21
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros      913.00   62.53
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.21179009627507137)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         2.87     NaN
Std          0.82     NaN
Min          0.00     NaN
25%          2.00     NaN
50%          3.00     NaN
75%          3.00     NaN
Max          8.00     NaN
Nunique      8.00    0.55
Outlies     14.00    0.96
Nagetive     0.00    0.00
Zeros        6.00    0.41
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.488396777072859)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.05     NaN
Std          0.22     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          1.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies     68.00    4.66
Nagetive     0.00    0.00
Zeros        1.00    0.07
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6763408364355531)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.52     NaN
Std          1.63     NaN
Min          2.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          7.00     NaN
Max         14.00     NaN
Nunique     12.00    0.82
Outlies     12.00    0.82
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: 0.6495651830548841)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         0.61     NaN
Std          0.64     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          1.00     NaN
75%          1.00     NaN
Max          3.00     NaN
Nunique      4.00    0.27
Outlies      5.00    0.34
Nagetive     0.00    0.00
Zeros      690.00   47.26
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are moderately skewed (skewness is: -0.541264504372725)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      1977.23     NaN
Std         24.78     NaN
Min       1900.00     NaN
25%       1960.00     NaN
50%       1978.00     NaN
75%       2001.00     NaN
Max       2010.00     NaN
Nunique    148.00   10.14
Outlies      1.00    0.07
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: -0.3425489297486655)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         1.77     NaN
Std          0.75     NaN
Min          0.00     NaN
25%          1.00     NaN
50%          2.00     NaN
75%          2.00     NaN
Max          4.00     NaN
Nunique      5.00    0.34
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros       81.00    5.55
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.17998090674623907)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean       472.98     NaN
Std        213.80     NaN
Min          0.00     NaN
25%        334.50     NaN
50%        480.00     NaN
75%        576.00     NaN
Max       1418.00     NaN
Nunique    441.00   30.21
Outlies      7.00    0.48
Nagetive     0.00    0.00
Zeros       81.00    5.55
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.5413757571931312)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        94.24     NaN
Std        125.34     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%        168.00     NaN
Max        857.00     NaN
Nunique    274.00   18.77
Outlies     22.00    1.51
Nagetive     0.00    0.00
Zeros      761.00   52.12
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 2.3643417403694404)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        46.66     NaN
Std         66.26     NaN
Min          0.00     NaN
25%          0.00     NaN
50%         25.00     NaN
75%         68.00     NaN
Max        547.00     NaN
Nunique    202.00   13.84
Outlies     27.00    1.85
Nagetive     0.00    0.00
Zeros      656.00   44.93
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 3.08987190371177)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        21.95     NaN
Std         61.12     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        552.00     NaN
Nunique    120.00    8.22
Outlies     51.00    3.49
Nagetive     0.00    0.00
Zeros     1252.00   85.75
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 10.304342032693112)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         3.41     NaN
Std         29.32     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        508.00     NaN
Nunique     20.00    1.37
Outlies     23.00    1.58
Nagetive     0.00    0.00
Zeros     1436.00   98.36
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 4.122213743143115)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean        15.06     NaN
Std         55.76     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        480.00     NaN
Nunique     76.00    5.21
Outlies     55.00    3.77
Nagetive     0.00    0.00
Zeros     1344.00   92.05
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 14.828373640750588)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         2.76     NaN
Std         40.18     NaN
Min          0.00     NaN
25%          0.00     NaN
50%          0.00     NaN
75%          0.00     NaN
Max        738.00     NaN
Nunique      8.00    0.55
Outlies      7.00    0.48
Nagetive     0.00    0.00
Zeros     1453.00   99.52
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 24.476794188821916)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

             Count   Ratio
Count      1460.00  100.00
NA            0.00    0.00
Mean         43.49     NaN
Std         496.12     NaN
Min           0.00     NaN
25%           0.00     NaN
50%           0.00     NaN
75%           0.00     NaN
Max       15500.00     NaN
Nunique      21.00    1.44
Outlies       8.00    0.55
Nagetive      0.00    0.00
Zeros      1408.00   96.44
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <MoSold_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.21205298505146022)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean         6.32     NaN
Std          2.70     NaN
Min          1.00     NaN
25%          5.00     NaN
50%          6.00     NaN
75%          8.00     NaN
Max         12.00     NaN
Nunique     12.00    0.82
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================



-------------------------

This Columns is duplicate of <YrSold_str> column
Column Type     : Number

-------------------------

-------------------------

The data is fairly symmetrical (skewness is: 0.09626851386568028)

-------------------------

            Count   Ratio
Count     1460.00  100.00
NA           0.00    0.00
Mean      2007.82     NaN
Std          1.33     NaN
Min       2006.00     NaN
25%       2007.00     NaN
50%       2008.00     NaN
75%       2009.00     NaN
Max       2010.00     NaN
Nunique      5.00    0.34
Outlies      0.00    0.00
Nagetive     0.00    0.00
Zeros        0.00    0.00
f


========================================= {column_name} =========================================


Column Type     : Number

-------------------------

-------------------------

The data are highly skewed (skewness is: 1.8828757597682129)
Note: When skewness exceed |1| we called it highly skewed

-------------------------

             Count   Ratio
Count       1460.0  100.00
NA             0.0    0.00
Mean      180921.2     NaN
Std        79442.5     NaN
Min        34900.0     NaN
25%       129975.0     NaN
50%       163000.0     NaN
75%       214000.0     NaN
Max       755000.0     NaN
Nunique      663.0   45.41
Outlies       22.0    1.51
Nagetive       0.0    0.00
Zeros          0.0    0.00


----------------------------------------------------------------------------------------------
****************************************** Modeling ******************************************

-------------------- This is Regression problem --------------------

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

 ------------------------------------- Linear Regression -------------------------------------


-------------------------

NOTE: This summary saved as <summary_OLS_1.csv>

-------------------------

                                   Variable         coef     std err      t  P>|t|      [0.025      0.975] Indicator
0                                  1stFlrSF      -4.3413      12.481 -0.348  0.728     -28.839      20.157          
1                                  2ndFlrSF       5.8851      12.053  0.488  0.625     -17.773      29.543          
2                                 3SsnPorch      37.2694      61.822  0.603  0.547     -84.076     158.615          
3                      3SsnPorch_str__"0.0"   -2079.8137    7551.898 -0.275  0.783  -16900.000   12700.000          
4                 3SsnPorch_str__Rare cases    2074.9696    7551.879  0.275  0.784  -12700.000   16900.000          
5               Alley_NA_indicator__Missing    3495.5872    2376.485  1.471  0.142   -1169.064    8160.239          
6           Alley_NA_indicator__Not missing   -3500.4314    2376.529 -1.473  0.141   -8165.170    1164.307          
7                               Alley__Grvl   -1259.8442    1572.476 -0.801  0.423   -4346.357    1826.669          
8                               Alley__Pave    1255.0001    1572.245  0.798  0.425   -1831.059    4341.059          
9                              BedroomAbvGr   -1310.4123    4435.130 -0.295  0.768  -10000.000    7395.024          
10                  BedroomAbvGr_str__"1.0"    5672.4422   10400.000  0.548  0.584  -14700.000   26000.000          
11                  BedroomAbvGr_str__"2.0"    1798.4046    6077.876  0.296  0.767  -10100.000   13700.000          
12                  BedroomAbvGr_str__"3.0"   -2616.3816    3408.673 -0.768  0.443   -9307.049    4074.286          
13                  BedroomAbvGr_str__"4.0"    2691.6555    4775.554  0.564  0.573   -6681.976   12100.000          
14                  BedroomAbvGr_str__"5.0"  -11650.0000   10500.000 -1.109  0.268  -32300.000    8964.596          
15             BedroomAbvGr_str__Rare cases    4094.2179   10500.000  0.391  0.696  -16400.000   24600.000          
16                           BldgType__1Fam   -6570.7722   13500.000 -0.487  0.626  -33000.000   19900.000          
17                         BldgType__2fmCon    9358.5009   27300.000  0.342  0.732  -44300.000   63000.000          
18                         BldgType__Duplex   -9159.0609    6250.601 -1.465  0.143  -21400.000    3109.847          
19                          BldgType__Twnhs    1802.8162   14900.000  0.121  0.903  -27400.000   31000.000          
20                         BldgType__TwnhsE    4563.6718   13700.000  0.332  0.740  -22400.000   31500.000          
21           BsmtCond_NA_indicator__Missing   -3701.3730    4229.913 -0.875  0.382  -12000.000    4601.255          
22       BsmtCond_NA_indicator__Not missing    3696.5288    4229.779  0.874  0.382   -4605.836   12000.000          
23                             BsmtCond__Fa   -5347.3105    4084.021 -1.309  0.191  -13400.000    2668.955          
24                             BsmtCond__Gd    3030.9907    3450.088  0.879  0.380   -3740.968    9802.950          
25                             BsmtCond__TA    2311.4756    2478.842  0.932  0.351   -2554.086    7177.037          
26       BsmtExposure_NA_indicator__Missing   -3701.3730    4229.913 -0.875  0.382  -12000.000    4601.255          
27   BsmtExposure_NA_indicator__Not missing    3696.5288    4229.779  0.874  0.382   -4605.836   12000.000          
28                         BsmtExposure__Av   -6207.6649    2190.969 -2.833  0.005  -10500.000   -1907.151        **
29                         BsmtExposure__Gd   17660.0000    2984.114  5.917  0.000   11800.000   23500.000       ***
30                         BsmtExposure__Mn   -3695.9078    2705.070 -1.366  0.172   -9005.518    1613.702          
31                         BsmtExposure__No   -7758.9521    1861.821 -4.167  0.000  -11400.000   -4104.501       ***
32                               BsmtFinSF1       2.2682       3.832  0.592  0.554      -5.254       9.791          
33                               BsmtFinSF2       7.6862       8.343  0.921  0.357      -8.689      24.061          
34       BsmtFinType1_NA_indicator__Missing   -3701.3730    4229.913 -0.875  0.382  -12000.000    4601.255          
35   BsmtFinType1_NA_indicator__Not missing    3696.5288    4229.779  0.874  0.382   -4605.836   12000.000          
36                        BsmtFinType1__ALQ     298.1800    2456.050  0.121  0.903   -4522.644    5119.004          
37                        BsmtFinType1__BLQ    1895.4817    2756.897  0.688  0.492   -3515.856    7306.820          
38                        BsmtFinType1__GLQ    4342.6084    2648.201  1.640  0.101    -855.377    9540.594          
39                        BsmtFinType1__LwQ   -5220.3497    4048.687 -1.289  0.198  -13200.000    2726.562          
40                        BsmtFinType1__Rec   -1696.8173    3023.496 -0.561  0.575   -7631.446    4237.811          
41                        BsmtFinType1__Unf     376.0527    2777.093  0.135  0.892   -5074.928    5827.033          
42       BsmtFinType2_NA_indicator__Missing    6910.1842   14600.000  0.474  0.636  -21700.000   35500.000          
43   BsmtFinType2_NA_indicator__Not missing   -6915.0283   14600.000 -0.474  0.636  -35500.000   21700.000          
44                        BsmtFinType2__ALQ    3665.4667    6987.331  0.525  0.600  -10000.000   17400.000          
45                        BsmtFinType2__BLQ    2888.6273    5567.056  0.519  0.604   -8038.595   13800.000          
46                        BsmtFinType2__GLQ   -3756.6246    9105.309 -0.413  0.680  -21600.000   14100.000          
47                        BsmtFinType2__LwQ   -2871.1734    5283.429 -0.543  0.587  -13200.000    7499.334          
48                        BsmtFinType2__Rec   -4915.4802    4756.905 -1.033  0.302  -14300.000    4421.546          
49                        BsmtFinType2__Unf    4984.3400    5164.718  0.965  0.335   -5153.158   15100.000          
50                             BsmtFullBath   14820.0000   11700.000  1.270  0.205   -8094.779   37700.000          
51                  BsmtFullBath_str__"0.0"   16570.0000   12100.000  1.364  0.173   -7281.817   40400.000          
52                  BsmtFullBath_str__"1.0"    6780.8522    4060.562  1.670  0.095   -1189.367   14800.000         .
53                  BsmtFullBath_str__"2.0"  -23350.0000   13900.000 -1.684  0.093  -50600.000    3867.778         .
54                             BsmtHalfBath     -53.0447   19200.000 -0.003  0.998  -37700.000   37600.000          
55                  BsmtHalfBath_str__"0.0"   -3765.1222    9849.549 -0.382  0.702  -23100.000   15600.000          
56                  BsmtHalfBath_str__"1.0"    3760.2781    9849.521  0.382  0.703  -15600.000   23100.000          
57           BsmtQual_NA_indicator__Missing   -3701.3730    4229.913 -0.875  0.382  -12000.000    4601.255          
58       BsmtQual_NA_indicator__Not missing    3696.5288    4229.779  0.874  0.382   -4605.836   12000.000          
59                             BsmtQual__Ex   12650.0000    4262.020  2.968  0.003    4285.840   21000.000        **
60                             BsmtQual__Fa   -3987.1200    5118.332 -0.779  0.436  -14000.000    6059.329          
61                             BsmtQual__Gd   -4065.7863    2610.048 -1.558  0.120   -9188.884    1057.312          
62                             BsmtQual__TA   -4603.4274    2633.496 -1.748  0.081   -9772.551     565.696         .
63                                BsmtUnfSF      -2.2817       4.677 -0.488  0.626     -11.462       6.899          
64                            CentralAir__N    -912.8214    2737.458 -0.333  0.739   -6286.004    4460.362          
65                            CentralAir__Y     907.9772    2737.455  0.332  0.740   -4465.199    6281.153          
66                       Condition1__Artery    -961.7133    5707.105 -0.169  0.866  -12200.000   10200.000          
67                        Condition1__Feedr   -3844.9486    4237.316 -0.907  0.364  -12200.000    4472.212          
68                         Condition1__Norm    6771.8850    2816.167  2.405  0.016    1244.210   12300.000         *
69                         Condition1__PosN  -13940.0000    7854.404 -1.775  0.076  -29400.000    1472.366         .
70                         Condition1__RRAe  -18500.0000    8460.254 -2.187  0.029  -35100.000   -1893.172         *
71                         Condition1__RRAn   16410.0000    6147.979  2.669  0.008    4343.348   28500.000        **
72                   Condition1__Rare cases   14060.0000    7799.400  1.803  0.072   -1246.027   29400.000         .
73                         Condition2__Norm   22250.0000    5447.705  4.085  0.000   11600.000   32900.000       ***
74                   Condition2__Rare cases  -22260.0000    5447.824 -4.085  0.000  -32900.000  -11600.000       ***
75                        Electrical__FuseA   -1759.5466    3478.963 -0.506  0.613   -8588.182    5069.089          
76                        Electrical__FuseF    6884.2751    5210.653  1.321  0.187   -3343.385   17100.000          
77                        Electrical__SBrkr   -5129.5727    3263.721 -1.572  0.116  -11500.000    1276.579          
78                            EnclosedPorch      31.1905      17.278  1.805  0.071      -2.724      65.104         .
79                            ExterCond__Fa    1717.2756    5284.605  0.325  0.745   -8655.542   12100.000          
80                            ExterCond__Gd   -2627.5526    3380.791 -0.777  0.437   -9263.492    4008.387          
81                            ExterCond__TA     905.4329    2830.684  0.320  0.749   -4650.736    6461.602          
82                            ExterQual__Ex   13300.0000    6988.705  1.903  0.057    -415.524   27000.000         .
83                            ExterQual__Fa     -25.9986   11100.000 -0.002  0.998  -21800.000   21800.000          
84                            ExterQual__Gd   -5271.3592    4462.546 -1.181  0.238  -14000.000    3487.891          
85                            ExterQual__TA   -8009.6483    4306.193 -1.860  0.063  -16500.000     442.705         .
86                     Exterior1st__AsbShng   19440.0000   15600.000  1.242  0.214  -11300.000   50200.000          
87                     Exterior1st__BrkFace   11630.0000    7446.038  1.562  0.119   -2987.712   26200.000          
88                     Exterior1st__CemntBd  -20140.0000   19500.000 -1.035  0.301  -58300.000   18000.000          
89                     Exterior1st__HdBoard    4624.9870    6189.838  0.747  0.455   -7524.654   16800.000          
90                     Exterior1st__MetalSd   -3432.5884    9580.418 -0.358  0.720  -22200.000   15400.000          
91                     Exterior1st__Plywood    9235.7424    6299.837  1.466  0.143   -3129.808   21600.000          
92                      Exterior1st__Stucco  -20670.0000   11900.000 -1.732  0.084  -44100.000    2755.309         .
93                     Exterior1st__VinylSd   -2391.7225    7648.713 -0.313  0.755  -17400.000   12600.000          
94                     Exterior1st__Wd Sdng   -2117.8927    5784.309 -0.366  0.714  -13500.000    9235.760          
95                     Exterior1st__WdShing    3813.3503    8581.103  0.444  0.657  -13000.000   20700.000          
96                     Exterior2nd__AsbShng  -19320.0000   15500.000 -1.243  0.214  -49800.000   11200.000          
97                     Exterior2nd__BrkFace     445.0308    9143.817  0.049  0.961  -17500.000   18400.000          
98                     Exterior2nd__CmentBd   17590.0000   20300.000  0.866  0.387  -22300.000   57400.000          
99                     Exterior2nd__HdBoard   -2736.2205    6022.968 -0.454  0.650  -14600.000    9085.882          
100                    Exterior2nd__ImStucc  -14130.0000    9621.497 -1.469  0.142  -33000.000    4755.580          
101                    Exterior2nd__MetalSd    1690.7938    9899.988  0.171  0.864  -17700.000   21100.000          
102                    Exterior2nd__Plywood   -4016.3164    5575.661 -0.720  0.472  -15000.000    6927.795          
103                 Exterior2nd__Rare cases   -6568.0191    9745.793 -0.674  0.501  -25700.000   12600.000          
104                     Exterior2nd__Stucco   18070.0000   12100.000  1.491  0.136   -5720.569   41900.000          
105                    Exterior2nd__VinylSd    7464.7050    7210.296  1.035  0.301   -6687.928   21600.000          
106                    Exterior2nd__Wd Sdng    4530.7551    5654.631  0.801  0.423   -6568.362   15600.000          
107                    Exterior2nd__Wd Shng   -3025.2082    7322.541 -0.413  0.680  -17400.000   11300.000          
108             Fence_NA_indicator__Missing   -1043.1026    1231.793 -0.847  0.397   -3460.912    1374.707          
109         Fence_NA_indicator__Not missing    1038.2585    1231.867  0.843  0.400   -1379.696    3456.212          
110                            Fence__GdPrv    -281.0904    2269.067 -0.124  0.901   -4734.898    4172.717          
111                             Fence__GdWo    -226.8164    2262.290 -0.100  0.920   -4667.321    4213.689          
112                            Fence__MnPrv    1298.0768    2043.669  0.635  0.525   -2713.313    5309.466          
113                             Fence__MnWw    -795.0142    4630.464 -0.172  0.864   -9883.859    8293.831          
114       FireplaceQu_NA_indicator__Missing    -558.8191     476.744 -1.172  0.241   -1494.591     376.952          
115   FireplaceQu_NA_indicator__Not missing     553.9750     476.620  1.162  0.245    -381.552    1489.502          
116                         FireplaceQu__Ex    5391.1276    5134.033  1.050  0.294   -4686.141   15500.000          
117                         FireplaceQu__Fa    -300.1210    2956.550 -0.102  0.919   -6103.345    5503.103          
118                         FireplaceQu__Gd   -1306.5021    2067.934 -0.632  0.528   -5365.519    2752.515          
119                         FireplaceQu__Po    -206.9138    3601.468 -0.057  0.954   -7276.007    6862.179          
120                         FireplaceQu__TA   -3582.4348    2144.271 -1.671  0.095   -7791.288     626.419         .
121                              Fireplaces  -17430.0000    5393.602 -3.231  0.001  -28000.000   -6840.035        **
122                   Fireplaces_str__"0.0"  -22070.0000    5403.495 -4.085  0.000  -32700.000  -11500.000       ***
123                   Fireplaces_str__"1.0"   -2977.0931    1411.126 -2.110  0.035   -5746.903    -207.283         *
124                   Fireplaces_str__"2.0"   25040.0000    5722.919  4.376  0.000   13800.000   36300.000       ***
125                      Foundation__BrkTil     -21.9775    4891.709 -0.004  0.996   -9623.603    9579.648          
126                      Foundation__CBlock    4676.1991    3911.212  1.196  0.232   -3000.871   12400.000          
127                       Foundation__PConc    6893.7130    4085.783  1.687  0.092   -1126.012   14900.000         .
128                  Foundation__Rare cases  -22360.0000   10400.000 -2.158  0.031  -42700.000   -2022.010         *
129                        Foundation__Slab   10810.0000   10000.000  1.076  0.282   -8902.879   30500.000          
130                                FullBath    7655.7680    5728.657  1.336  0.182   -3588.650   18900.000          
131                     FullBath_str__"1.0"   -6310.2331    7805.781 -0.808  0.419  -21600.000    9011.240          
132                     FullBath_str__"2.0"   -7587.7706    3964.744 -1.914  0.056  -15400.000     194.374         .
133                     FullBath_str__"3.0"    9713.8474    3789.400  2.563  0.011    2275.874   17200.000         *
134                FullBath_str__Rare cases    4179.3121    7797.121  0.536  0.592  -11100.000   19500.000          
135                        Functional__Maj1  -12040.0000    7937.842 -1.516  0.130  -27600.000    3545.128          
136                        Functional__Min1    4978.5878    6240.879  0.798  0.425   -7271.238   17200.000          
137                        Functional__Min2    4900.9551    5480.331  0.894  0.371   -5856.039   15700.000          
138                         Functional__Mod   -9515.1811    8057.106 -1.181  0.238  -25300.000    6299.602          
139                         Functional__Typ   11670.0000    3592.836  3.247  0.001    4614.204   18700.000        **
140                              GarageArea      11.3343      10.806  1.049  0.295      -9.876      32.544          
141                              GarageCars    7118.5604    8957.390  0.795  0.427  -10500.000   24700.000          
142                   GarageCars_str__"0.0"     680.4035    1646.418  0.413  0.680   -2551.246    3912.053          
143                   GarageCars_str__"1.0"   -1873.6670    8690.287 -0.216  0.829  -18900.000   15200.000          
144                   GarageCars_str__"2.0"   -4667.6933    1772.594 -2.633  0.009   -8147.006   -1188.380        **
145                   GarageCars_str__"3.0"    5856.1126    9739.402  0.601  0.548  -13300.000   25000.000          
146        GarageCond_NA_indicator__Missing     680.4035    1646.418  0.413  0.680   -2551.246    3912.053          
147    GarageCond_NA_indicator__Not missing    -685.2476    1646.464 -0.416  0.677   -3916.988    2546.493          
148                          GarageCond__Fa    -133.7145    6514.937 -0.021  0.984  -12900.000   12700.000          
149                          GarageCond__Gd   -3771.7805    9538.061 -0.395  0.693  -22500.000   14900.000          
150                          GarageCond__Po     339.2597   13600.000  0.025  0.980  -26400.000   27100.000          
151                          GarageCond__TA    3561.3912    5715.355  0.623  0.533   -7656.916   14800.000          
152      GarageFinish_NA_indicator__Missing     680.4035    1646.418  0.413  0.680   -2551.246    3912.053          
153  GarageFinish_NA_indicator__Not missing    -685.2476    1646.464 -0.416  0.677   -3916.988    2546.493          
154                       GarageFinish__Fin    3738.3343    1692.317  2.209  0.027     416.593    7060.075         *
155                       GarageFinish__RFn    -458.4012    1471.812 -0.311  0.756   -3347.327    2430.524          
156                       GarageFinish__Unf   -3284.7773    1792.833 -1.832  0.067   -6803.816     234.262         .
157        GarageQual_NA_indicator__Missing     680.4035    1646.418  0.413  0.680   -2551.246    3912.053          
158    GarageQual_NA_indicator__Not missing    -685.2476    1646.464 -0.416  0.677   -3916.988    2546.493          
159                          GarageQual__Fa   -5345.1643    5470.095 -0.977  0.329  -16100.000    5391.738          
160                          GarageQual__Gd    6651.4683    7537.120  0.882  0.378   -8142.668   21400.000          
161                          GarageQual__TA   -1311.1481    4107.718 -0.319  0.750   -9373.928    6751.631          
162        GarageType_NA_indicator__Missing     680.4035    1646.418  0.413  0.680   -2551.246    3912.053          
163    GarageType_NA_indicator__Not missing    -685.2476    1646.464 -0.416  0.677   -3916.988    2546.493          
164                      GarageType__Attchd   -2061.8625    3429.870 -0.601  0.548   -8794.137    4670.412          
165                     GarageType__Basment    3912.7792    7254.331  0.539  0.590  -10300.000   18200.000          
166                     GarageType__BuiltIn  -11090.0000    4816.018 -2.302  0.022  -20500.000   -1635.359         *
167                     GarageType__CarPort    6667.5872   10100.000  0.660  0.509  -13200.000   26500.000          
168                      GarageType__Detchd    2565.0667    3586.470  0.715  0.475   -4474.587    9604.721          
169                             GarageYrBlt     -78.1122      85.236 -0.916  0.360    -245.416      89.192          
170       GarageYrBlt_NA_indicator__Missing     680.4035    1646.418  0.413  0.680   -2551.246    3912.053          
171   GarageYrBlt_NA_indicator__Not missing    -685.2476    1646.464 -0.416  0.677   -3916.988    2546.493          
172                               GrLivArea      44.9795      12.304  3.656  0.000      20.828      69.131       ***
173                                HalfBath    1375.3034    4992.634  0.275  0.783   -8424.421   11200.000          
174                     HalfBath_str__"0.0"   -1618.7921    1027.159 -1.576  0.115   -3634.937     397.353          
175                     HalfBath_str__"1.0"    1852.5925    5065.087  0.366  0.715   -8089.346   11800.000          
176                     HalfBath_str__"2.0"    -238.6445    4922.919 -0.048  0.961   -9901.530    9424.241          
177                           HeatingQC__Ex     -85.4541    2301.320 -0.037  0.970   -4602.569    4431.660          
178                           HeatingQC__Fa    2121.0319    4549.793  0.466  0.641   -6809.468   11100.000          
179                           HeatingQC__Gd    -635.2931    2292.879 -0.277  0.782   -5135.840    3865.254          
180                           HeatingQC__TA   -1405.1288    2145.689 -0.655  0.513   -5616.767    2806.509          
181                           Heating__GasA    4150.6873    5768.730  0.720  0.472   -7172.387   15500.000          
182                           Heating__GasW    6590.8025    7041.075  0.936  0.350   -7229.678   20400.000          
183                     Heating__Rare cases  -10750.0000    8280.814 -1.298  0.195  -27000.000    5507.552          
184                      HouseStyle__1.5Fin   12340.0000   11400.000  1.081  0.280  -10100.000   34700.000          
185                      HouseStyle__1.5Unf   23660.0000   32500.000  0.728  0.467  -40100.000   87400.000          
186                      HouseStyle__1Story   18070.0000    9429.892  1.916  0.056    -442.597   36600.000         .
187                      HouseStyle__2.5Unf  -56030.0000   20100.000 -2.783  0.006  -95500.000  -16500.000        **
188                      HouseStyle__2Story   -3668.3682    9861.095 -0.372  0.710  -23000.000   15700.000          
189                  HouseStyle__Rare cases  -61360.0000   21300.000 -2.879  0.004 -103000.000  -19500.000        **
190                      HouseStyle__SFoyer   40870.0000   14400.000  2.832  0.005   12500.000   69200.000        **
191                        HouseStyle__SLvl   26120.0000   15000.000  1.744  0.082   -3277.895   55500.000         .
192                            KitchenAbvGr  -22560.0000   18400.000 -1.223  0.222  -58700.000   13600.000          
193                 KitchenAbvGr_str__"1.0"  -10480.0000   10300.000 -1.021  0.308  -30600.000    9663.654          
194                 KitchenAbvGr_str__"2.0"   10470.0000   10300.000  1.021  0.308   -9668.246   30600.000          
195                         KitchenQual__Ex   13530.0000    4286.411  3.156  0.002    5113.490   21900.000        **
196                         KitchenQual__Fa     167.9946    5120.826  0.033  0.974   -9883.351   10200.000          
197                         KitchenQual__Gd   -5490.9265    2529.325 -2.171  0.030  -10500.000    -526.275         *
198                         KitchenQual__TA   -8208.9276    2518.350 -3.260  0.001  -13200.000   -3265.819        **
199                        LandContour__Bnk   -4735.4850    4365.735 -1.085  0.278  -13300.000    3833.740          
200                        LandContour__HLS    4036.8129    4449.691  0.907  0.365   -4697.203   12800.000          
201                        LandContour__Low   -4146.6704    5589.500 -0.742  0.458  -15100.000    6824.606          
202                        LandContour__Lvl    4840.4984    3095.355  1.564  0.118   -1235.178   10900.000          
203                          LandSlope__Gtl    3531.4813    5587.739  0.632  0.528   -7436.337   14500.000          
204                          LandSlope__Mod    9833.7261    5331.768  1.844  0.065    -631.664   20300.000         .
205                          LandSlope__Sev  -13370.0000    9398.259 -1.423  0.155  -31800.000    5077.195          
206                                 LotArea       0.2382       0.157  1.521  0.129      -0.069       0.545          
207                       LotConfig__Corner    -782.7630    2341.935 -0.334  0.738   -5379.599    3814.073          
208                      LotConfig__CulDSac    8492.8091    3100.784  2.739  0.006    2406.476   14600.000        **
209                          LotConfig__FR2   -7914.6539    4067.475 -1.946  0.052  -15900.000      69.135         .
210                       LotConfig__Inside     199.7637    1991.184  0.100  0.920   -3708.605    4108.132          
211                             LotFrontage     -61.3684      59.958 -1.024  0.306    -179.057      56.320          
212       LotFrontage_NA_indicator__Missing     361.5045    1322.591  0.273  0.785   -2234.526    2957.535          
213   LotFrontage_NA_indicator__Not missing    -366.3486    1322.594 -0.277  0.782   -2962.385    2229.687          
214                           LotShape__IR1    -285.9946    3165.893 -0.090  0.928   -6500.126    5928.137          
215                           LotShape__IR2    5812.4145    4657.052  1.248  0.212   -3328.618   15000.000          
216                           LotShape__IR3   -6226.6686    7700.723 -0.809  0.419  -21300.000    8888.593          
217                           LotShape__Reg     695.4045    3328.262  0.209  0.835   -5837.431    7228.240          
218                            LowQualFinSF      43.4357      33.537  1.295  0.196     -22.393     109.264          
219                 LowQualFinSF_str__"0.0"    9482.7561    7380.387  1.285  0.199   -5003.738   24000.000          
220            LowQualFinSF_str__Rare cases   -9487.6002    7380.361 -1.286  0.199  -24000.000    4998.842          
221                              MSSubClass     493.8193     976.779  0.506  0.613   -1423.438    2411.076          
222                 MSSubClass_str__"120.0"  -35780.0000   33900.000 -1.055  0.292 -102000.000   30800.000          
223                 MSSubClass_str__"160.0"  -45970.0000   71200.000 -0.645  0.519 -186000.000   93900.000          
224                 MSSubClass_str__"180.0"  -84060.0000   91900.000 -0.915  0.360 -264000.000   96200.000          
225                 MSSubClass_str__"190.0"  -64190.0000  105000.000 -0.614  0.539 -269000.000  141000.000          
226                  MSSubClass_str__"20.0"   47480.0000   68700.000  0.691  0.490  -87400.000  182000.000          
227                  MSSubClass_str__"30.0"   33510.0000   60500.000  0.554  0.580  -85200.000  152000.000          
228                  MSSubClass_str__"45.0"   24510.0000   56700.000  0.432  0.666  -86700.000  136000.000          
229                  MSSubClass_str__"50.0"   23510.0000   40700.000  0.577  0.564  -56400.000  103000.000          
230                  MSSubClass_str__"60.0"   25700.0000   30300.000  0.847  0.397  -33800.000   85200.000          
231                  MSSubClass_str__"70.0"   26020.0000   22600.000  1.150  0.251  -18400.000   70400.000          
232                  MSSubClass_str__"75.0"   79400.0000   26500.000  3.001  0.003   27500.000  131000.000        **
233                  MSSubClass_str__"80.0"    3265.1889   17700.000  0.184  0.854  -31500.000   38000.000          
234                  MSSubClass_str__"85.0"  -24220.0000   15800.000 -1.538  0.125  -55100.000    6700.558          
235                  MSSubClass_str__"90.0"   -9159.0609    6250.601 -1.465  0.143  -21400.000    3109.847          
236                       MSZoning__C (all)   -8905.7206   13200.000 -0.673  0.501  -34900.000   17100.000          
237                            MSZoning__FV   15280.0000    8223.752  1.858  0.064    -861.201   31400.000         .
238                            MSZoning__RH   -5202.4775    8712.739 -0.597  0.551  -22300.000   11900.000          
239                            MSZoning__RL    -320.9281    4630.852 -0.069  0.945   -9410.534    8768.678          
240                            MSZoning__RM    -856.3985    5250.649 -0.163  0.870  -11200.000    9449.768          
241                              MasVnrArea      21.2898       7.569  2.813  0.005       6.433      36.147        **
242    MasVnrArea_NA_indicator__Not missing    -285.3145    2709.717 -0.105  0.916   -5604.047    5033.418          
243     MasVnrArea_NA_indicator__Rare cases     280.4703    2709.737  0.104  0.918   -5038.300    5599.241          
244    MasVnrType_NA_indicator__Not missing    -285.3145    2709.717 -0.105  0.916   -5604.047    5033.418          
245     MasVnrType_NA_indicator__Rare cases     280.4703    2709.737  0.104  0.918   -5038.300    5599.241          
246                      MasVnrType__BrkCmn   -6615.8020    6992.351 -0.946  0.344  -20300.000    7109.041          
247                     MasVnrType__BrkFace    -931.6397    2857.815 -0.326  0.745   -6541.063    4677.784          
248                        MasVnrType__None    5201.6034    3011.026  1.728  0.084    -708.549   11100.000         .
249                       MasVnrType__Stone    2340.9941    3564.531  0.657  0.512   -4655.597    9337.585          
250       MiscFeature_NA_indicator__Missing    1180.1962   17100.000  0.069  0.945  -32400.000   34800.000          
251   MiscFeature_NA_indicator__Not missing   -1185.0404   17100.000 -0.069  0.945  -34800.000   32400.000          
252                       MiscFeature__Othr  280400.0000   35400.000  7.922  0.000  211000.000  350000.000       ***
253                 MiscFeature__Rare cases -557700.0000   70500.000 -7.907  0.000 -696000.000 -419000.000       ***
254                       MiscFeature__Shed  277300.0000   35200.000  7.872  0.000  208000.000  346000.000       ***
255                                 MiscVal      12.1074      10.023  1.208  0.227      -7.566      31.781          
256                      MiscVal_str__"0.0"    3802.7136   23300.000  0.163  0.871  -42000.000   49600.000          
257                    MiscVal_str__"400.0"   -7832.2406   13600.000 -0.574  0.566  -34600.000   19000.000          
258                 MiscVal_str__Rare cases    4024.6830   13100.000  0.306  0.759  -21800.000   29800.000          
259                                  MoSold    -162.5809     352.020 -0.462  0.644    -853.538     528.376          
260                       MoSold_str__"1.0"     825.3061    3570.885  0.231  0.817   -6183.758    7834.370          
261                      MoSold_str__"10.0"   -6136.2362    3310.473 -1.854  0.064  -12600.000     361.682         .
262                      MoSold_str__"11.0"    2494.8557    3524.744  0.708  0.479   -4423.641    9413.352          
263                      MoSold_str__"12.0"     721.1909    3496.536  0.206  0.837   -6141.939    7584.321          
264                       MoSold_str__"2.0"   -7494.5687    3800.907 -1.972  0.049  -15000.000     -34.009         *
265                       MoSold_str__"3.0"    2719.6670    3037.271  0.895  0.371   -3242.000    8681.334          
266                       MoSold_str__"4.0"    3089.2057    2847.330  1.085  0.278   -2499.638    8678.049          
267                       MoSold_str__"5.0"     828.0756    2398.801  0.345  0.730   -3880.379    5536.530          
268                       MoSold_str__"6.0"    1984.3847    2203.429  0.901  0.368   -2340.587    6309.356          
269                       MoSold_str__"7.0"    4880.2269    2324.465  2.100  0.036     317.681    9442.773         *
270                       MoSold_str__"8.0"   -3794.3772    2965.946 -1.279  0.201   -9616.044    2027.289          
271                       MoSold_str__"9.0"    -122.5747    4143.337 -0.030  0.976   -8255.269    8010.120          
272                   Neighborhood__Blmngtn   -5914.0752    9519.548 -0.621  0.535  -24600.000   12800.000          
273                    Neighborhood__BrDale   -2432.6525   12000.000 -0.203  0.839  -26000.000   21100.000          
274                   Neighborhood__BrkSide    -706.0561    6392.617 -0.110  0.912  -13300.000   11800.000          
275                   Neighborhood__ClearCr    6665.2772    7588.666  0.878  0.380   -8230.034   21600.000          
276                   Neighborhood__CollgCr    2708.1320    4007.812  0.676  0.499   -5158.548   10600.000          
277                   Neighborhood__Crawfor   18230.0000    6061.059  3.008  0.003    6334.418   30100.000        **
278                   Neighborhood__Edwards  -22530.0000    4064.705 -5.543  0.000  -30500.000  -14600.000       ***
279                   Neighborhood__Gilbert    -821.7721    5166.817 -0.159  0.874  -11000.000    9319.845          
280                    Neighborhood__IDOTRR  -19280.0000    9037.441 -2.134  0.033  -37000.000   -1545.446         *
281                   Neighborhood__MeadowV   -9512.1907   12300.000 -0.774  0.439  -33600.000   14600.000          
282                   Neighborhood__Mitchel   -8044.7776    5181.962 -1.552  0.121  -18200.000    2126.567          
283                     Neighborhood__NAmes   -9016.9380    3602.958 -2.503  0.013  -16100.000   -1944.919         *
284                    Neighborhood__NWAmes   -7408.2526    4826.646 -1.535  0.125  -16900.000    2065.666          
285                   Neighborhood__NoRidge   37050.0000    6546.908  5.659  0.000   24200.000   49900.000       ***
286                   Neighborhood__NridgHt   26830.0000    5617.810  4.775  0.000   15800.000   37900.000       ***
287                   Neighborhood__OldTown  -16750.0000    6553.809 -2.555  0.011  -29600.000   -3881.914         *
288                Neighborhood__Rare cases     890.5270   12000.000  0.074  0.941  -22700.000   24400.000          
289                     Neighborhood__SWISU  -23890.0000    7993.583 -2.988  0.003  -39600.000   -8196.887        **
290                    Neighborhood__Sawyer   -4871.8407    4570.224 -1.066  0.287  -13800.000    4098.762          
291                   Neighborhood__SawyerW    3795.9915    4834.402  0.785  0.433   -5693.149   13300.000          
292                   Neighborhood__Somerst    1489.9423    7870.593  0.189  0.850  -14000.000   16900.000          
293                   Neighborhood__StoneBr   29320.0000    8768.744  3.344  0.001   12100.000   46500.000        **
294                    Neighborhood__Timber   -6188.9940    6859.789 -0.902  0.367  -19700.000    7275.651          
295                   Neighborhood__Veenker   10380.0000    9493.151  1.094  0.274   -8250.729   29000.000          
296                             OpenPorchSF       1.6354      16.398  0.100  0.921     -30.551      33.821          
297                             OverallCond    1497.5135    5795.275  0.258  0.796   -9877.665   12900.000          
298                  OverallCond_str__"3.0"  -10340.0000   18000.000 -0.574  0.566  -45700.000   25000.000          
299                  OverallCond_str__"4.0"  -10590.0000   12500.000 -0.848  0.397  -35100.000   13900.000          
300                  OverallCond_str__"5.0"   -1962.7515    6797.464 -0.289  0.773  -15300.000   11400.000          
301                  OverallCond_str__"6.0"    2373.2526    2950.940  0.804  0.421   -3418.961    8165.467          
302                  OverallCond_str__"7.0"    4477.0479    6465.453  0.692  0.489   -8213.581   17200.000          
303                  OverallCond_str__"8.0"    2590.3440   12500.000  0.208  0.835  -21900.000   27000.000          
304                  OverallCond_str__"9.0"   13440.0000   18700.000  0.718  0.473  -23300.000   50200.000          
305                             OverallQual    5601.9469    6462.640  0.867  0.386   -7083.159   18300.000          
306                 OverallQual_str__"10.0"    7465.6789   24700.000  0.302  0.763  -41100.000   56000.000          
307                  OverallQual_str__"3.0"   13810.0000   24100.000  0.573  0.567  -33500.000   61100.000          
308                  OverallQual_str__"4.0"   -5916.2937   16400.000 -0.360  0.719  -38100.000   26300.000          
309                  OverallQual_str__"5.0"  -10000.0000   10800.000 -0.930  0.352  -31100.000   11100.000          
310                  OverallQual_str__"6.0"  -11690.0000    5114.898 -2.286  0.022  -21700.000   -1654.867         *
311                  OverallQual_str__"7.0"   -8737.1629    4665.545 -1.873  0.061  -17900.000     420.539         .
312                  OverallQual_str__"8.0"    1787.5831   10100.000  0.177  0.859  -18000.000   21600.000          
313                  OverallQual_str__"9.0"   13280.0000   17500.000  0.760  0.447  -21000.000   47600.000          
314                           PavedDrive__N    1837.0093    3886.008  0.473  0.637   -5790.590    9464.608          
315                           PavedDrive__P   -1549.7854    4803.762 -0.323  0.747  -11000.000    7879.215          
316                           PavedDrive__Y    -292.0680    3120.131 -0.094  0.925   -6416.376    5832.240          
317                                PoolArea    1341.8090     175.691  7.637  0.000     996.956    1686.662       ***
318                              PoolQC__Fa     786.7950    1418.131  0.555  0.579   -1996.765    3570.355          
319                              PoolQC__Gd    -791.6392    1418.117 -0.558  0.577   -3575.170    1991.892          
320                       RoofMatl__CompShg     224.5666    7815.854  0.029  0.977  -15100.000   15600.000          
321                    RoofMatl__Rare cases   25620.0000    8106.762  3.161  0.002    9710.871   41500.000        **
322                       RoofMatl__Tar&Grv  -25850.0000   11800.000 -2.188  0.029  -49000.000   -2661.402         *
323                         RoofStyle__Flat   -1899.6956   14800.000 -0.129  0.898  -30900.000   27100.000          
324                        RoofStyle__Gable    2573.2021    5599.293  0.460  0.646   -8417.296   13600.000          
325                      RoofStyle__Gambrel   -9681.6685   11200.000 -0.868  0.386  -31600.000   12200.000          
326                          RoofStyle__Hip    2109.4958    5875.727  0.359  0.720   -9423.597   13600.000          
327                   RoofStyle__Rare cases    6893.8222   12500.000  0.553  0.580  -17600.000   31300.000          
328                  SaleCondition__Abnorml    2194.4422    5457.420  0.402  0.688   -8517.581   12900.000          
329                   SaleCondition__Alloca  -11350.0000   11200.000 -1.012  0.312  -33400.000   10700.000          
330                   SaleCondition__Family   -1574.9543    7369.950 -0.214  0.831  -16000.000   12900.000          
331                   SaleCondition__Normal    8828.3764    4684.507  1.885  0.060    -366.546   18000.000         .
332                  SaleCondition__Partial    1897.6217   14100.000  0.134  0.893  -25800.000   29600.000          
333                           SaleType__COD   -9845.4683    6847.459 -1.438  0.151  -23300.000    3594.976          
334                           SaleType__New   11530.0000   13300.000  0.868  0.386  -14500.000   37600.000          
335                    SaleType__Rare cases    5998.3642    6785.273  0.884  0.377   -7320.018   19300.000          
336                            SaleType__WD   -7691.3930    5267.584 -1.460  0.145  -18000.000    2648.013          
337                             ScreenPorch      63.8770      15.742  4.058  0.000      32.978      94.776       ***
338                            TotRmsAbvGrd    9411.1697    5112.907  1.841  0.066    -624.632   19400.000         .
339                TotRmsAbvGrd_str__"10.0"   -5932.6693   13200.000 -0.450  0.653  -31800.000   19900.000          
340                TotRmsAbvGrd_str__"11.0"  -45650.0000   18600.000 -2.448  0.015  -82200.000   -9048.481         *
341                TotRmsAbvGrd_str__"12.0"  -22610.0000   23800.000 -0.950  0.342  -69300.000   24100.000          
342                 TotRmsAbvGrd_str__"3.0"   28510.0000   23600.000  1.210  0.227  -17800.000   74800.000          
343                 TotRmsAbvGrd_str__"4.0"   23510.0000   17500.000  1.346  0.179  -10800.000   57800.000          
344                 TotRmsAbvGrd_str__"5.0"   15310.0000   12500.000  1.225  0.221   -9227.869   39900.000          
345                 TotRmsAbvGrd_str__"6.0"   11730.0000    7721.951  1.519  0.129   -3428.812   26900.000          
346                 TotRmsAbvGrd_str__"7.0"    5178.9634    3701.284  1.399  0.162   -2086.053   12400.000          
347                 TotRmsAbvGrd_str__"8.0"   -2351.4821    4316.603 -0.545  0.586  -10800.000    6121.304          
348                 TotRmsAbvGrd_str__"9.0"   -7706.4971    8632.447 -0.893  0.372  -24700.000    9237.586          
349                             TotalBsmtSF       7.6727       5.564  1.379  0.168      -3.248      18.594          
350                              WoodDeckSF      22.3136       8.225  2.713  0.007       6.170      38.457        **
351                               YearBuilt      36.1893     121.956  0.297  0.767    -203.191     275.569          
352                            YearRemodAdd     154.0372      78.419  1.964  0.050       0.114     307.960         .
353                                  YrSold    -299.4167     154.369 -1.940  0.053    -602.419       3.585         .
354                    YrSold_str__"2006.0"   -1461.1936    1805.449 -0.809  0.419   -5004.994    2082.607          
355                    YrSold_str__"2007.0"   -1503.7430    1759.694 -0.855  0.393   -4957.736    1950.250          
356                    YrSold_str__"2008.0"     637.4187    1817.045  0.351  0.726   -2929.143    4203.980          
357                    YrSold_str__"2009.0"    -356.1383    1751.333 -0.203  0.839   -3793.718    3081.442          
358                    YrSold_str__"2010.0"    2678.8120    2349.664  1.140  0.255   -1933.195    7290.819          

-------------------------

 --- Model statistic --- 

R-squared         : 0.918
Adj. R-squared    : 0.892
F-statistic       : 35
Prob (F-statistic): 0.0
No. Observations  : 1095
AIC               : 25561
Df Residuals      : 827
BIC               : 26900
RMSE (test)       : 62323

-------------------------

Maximum correlation between Reseduals and any data columns is 1.999455839599218e-12, with columns <LotArea>
Mean of train reseduals: -2.5779030128428924e-08

 ------------------------------------- Random Forest -------------------------------------


-------------------------

RF model peramters:

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 200,
 'n_jobs': None,
 'oob_score': True,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

-------------------------

-------------------------

--- Model statistic ---
R^2 (test) : 0.8853590416339046
R^2 (train): 0.9797012301701824
RMSE (test): 28613
oob score  : 0.8474266006004882

-------------------------

Maximum correlation between Reseduals and any data columns is 0.4524305756076382, with columns <Exterior2nd__ImStucc>